# -*- coding: utf-8 -*-
"""Decision_Tree.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1xvBcp5tNh3Dcn30qOjQw-CQ4wDOG0sQA
"""

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import Image
import pydotplus
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Create a DataFrame for better visualization (optional but good practice)
df = pd.DataFrame(X, columns=feature_names)
df['species'] = y
print("Dataset Head:")
print(df.head())
print("\nTarget Names:", target_names)
print("Feature Names:", feature_names)
print("Class Distribution:", np.bincount(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")
# Initialize the Decision Tree Classifier
# Key hyperparameters:
# - criterion: "gini" for Gini impurity (default) or "entropy" for information gain.
# - max_depth: Maximum depth of the tree. Limits overfitting.
# - min_samples_split: Minimum number of samples required to split an internal node.
# - min_samples_leaf: Minimum number of samples required to be at a leaf node.
# - random_state: For reproducibility.

# Model with default parameters (can overfit)
model_default = DecisionTreeClassifier(random_state=42)
model_default.fit(X_train, y_train)
print("Default Model training complete.")

# Model with limited depth to prevent overfitting
model_tuned = DecisionTreeClassifier(max_depth=3, random_state=42)
model_tuned.fit(X_train, y_train)
print("Tuned Model (max_depth=3) training complete.")
# Predictions from the default model
y_pred_default = model_default.predict(X_test)
print("\nDefault Model - Predicted class labels (first 5 samples):\n", y_pred_default[:5])
print("True class labels (first 5 samples):\n", y_test[:5])

# Predictions from the tuned model
y_pred_tuned = model_tuned.predict(X_test)
print("\nTuned Model - Predicted class labels (first 5 samples):\n", y_pred_tuned[:5])
print("True class labels (first 5 samples):\n", y_test[:5])
# Evaluate Default Model
print("\n--- Evaluation for Default Decision Tree Model ---")
accuracy_default = accuracy_score(y_test, y_pred_default)
print(f"Accuracy: {accuracy_default:.2f}")

conf_matrix_default = confusion_matrix(y_test, y_pred_default)
print("\nConfusion Matrix:\n", conf_matrix_default)

class_report_default = classification_report(y_test, y_pred_default, target_names=target_names)
print("\nClassification Report:\n", class_report_default)

# Visualize Confusion Matrix for Default Model
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_default, annot=True, fmt='d', cmap='Blues',
            xticklabels=target_names,
            yticklabels=target_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix (Default Decision Tree)')
plt.show()

# Evaluate Tuned Model (max_depth=3)
print("\n--- Evaluation for Tuned Decision Tree Model (max_depth=3) ---")
accuracy_tuned = accuracy_score(y_test, y_pred_tuned)
print(f"Accuracy: {accuracy_tuned:.2f}")

conf_matrix_tuned = confusion_matrix(y_test, y_pred_tuned)
print("\nConfusion Matrix:\n", conf_matrix_tuned)

class_report_tuned = classification_report(y_test, y_pred_tuned, target_names=target_names)
print("\nClassification Report:\n", class_report_tuned)

# Visualize Confusion Matrix for Tuned Model
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_tuned, annot=True, fmt='d', cmap='Greens',
            xticklabels=target_names,
            yticklabels=target_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix (Tuned Decision Tree - max_depth=3)')
plt.show()
# Visualize the Tuned Decision Tree (max_depth=3)
dot_data = StringIO()
export_graphviz(model_tuned, out_file=dot_data,
                filled=True, rounded=True,
                special_characters=True,
                feature_names=feature_names,
                class_names=target_names)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())

# You can also save the image to a file
# graph.write_png("iris_decision_tree_tuned.png")
# print("Decision tree saved to iris_decision_tree_tuned.png")